This notebook is a rough outline of solutions to the applied exercises in Chapter 9 of An Introduction to Statistical Learning (James et al. (2013)1).
library(ISLR)
library(e1071)
library(cowplot)
library(latex2exp)
library(tidyverse)
library(patchwork)
theme_set(theme_bw())
# Given a filename, return the filepath. Default folder is the `data` folder in the current working directory.
get_path <- function(filename) {
str_c(getwd(), "data", filename, sep = "/")
}
# Load my functions:
source("/home/brian/R/dir/Util/bh_functions.R")
# Colors:
ISLR_blue <- "#176FC0"
ISLR_brown <- "#94340E"
ISLR_lightblue <- "#68A5DB"
ISLR_orange <- "#CC5318"
# Functions for plotting `svm` objects:
dist2point <- function(coef, p) {
dist <- (coef[2]*p[1] + coef[3]*p[2] +coef[1]) / sqrt(coef[2]^2 + coef[3]^2)
unname(dist)
}
svc_margins <- function(svc) {
slope <- -coef(svc)[3]/coef(svc)[2] # slope
int0 <- -coef(svc)[1] # hyperplane intercept
v1 <- svc$SV[apply(svc$SV, 1, function(v) dist2point(coef(svc), v)) %>% which.max(), ] # support vector 1
v2 <- svc$SV[apply(svc$SV, 1, function(v) dist2point(coef(svc), v)) %>% which.min(), ] # support vector 2
int1 <- v1[1] - slope*v1[2] # intercept of margin 1
int2 <- v2[1] - slope*v2[2] # intercept of margin 2
c(slope, int0, int1, int2) %>% unname()
}
svm_plot <- function(svm, df) {
dat <- df
x1 <- dat[ , 1]
x2 <- dat[ , 2]
y <- dat[ , ncol(dat)]
grid <- expand.grid(seq(min(x1), max(x1), length.out = 100),
seq(min(x2), max(x2), length.out = 100))
names(grid) <- names(dat)[1:2]
preds <- predict(svm, grid)
dat_ <- data.frame(grid, preds)
cols <- c('1' = '#f53224', '-1' = '#1a6eff') # 1 := red, -1 := blue; three shades darker on https://www.w3schools.com/colors/colors_picker.asp
tiles <- c('1' = '#F8766D', '-1' = '#619CFF')
shapes <- c('support' = 4, 'notsupport' = 1)
dat$support <- 'notsupport'
dat[svm$index, 'support'] <- 'support'
g <- ggplot(dat_, aes(x = x2, y = x1)) +
geom_tile(aes(fill = preds), color = "white", alpha = 0.3) +
scale_fill_manual(name = "Pred. Class", values = tiles)
if(svm$kernel == 0) { # if linear kernel add margins
margins <- svc_margins(svm)
g <- g +
#geom_abline(aes(slope = margins[1], intercept = margins[2]), col = "green") + # line of separation
geom_abline(aes(slope = margins[1], intercept = margins[4]), col = cols[1]) + # 1 := red
geom_abline(aes(slope = margins[1], intercept = margins[3]), col = cols[2]) # -1 := blue
}
g <- g +
geom_point(data = dat, aes(color = y, shape = support), size = 2, show.legend = FALSE) +
scale_color_manual(values = cols) +
scale_shape_manual(values = shapes) +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
guides(fill = guide_legend(override.aes = list(alpha = 1))) +
theme_bw() +
ggtitle('SVM Classification Plot') +
theme(plot.title = element_text(hjust = 0.5))
g
}
# Return vector of the names of numeric columns in a data frame:
numeric_cols <- function(df) {
is_numeric_cols <- map_lgl(df, is.numeric)
return(names(df)[is_numeric_cols])
}
Generate a simulated two-class data set with 100 observations and two features in which there is a visible but non-linear separation between the two classes. Show that in this setting, a support vector machine with a polynomial kernel (with degree greater than 1) or a radial kernel will outperform a support vector classifier on the training data. Which technique performs best on the test data? Make plots and report training and test error rates in order to back up your assertions.
Generate data:
# Prelims:
set.seed(1)
# Data:
x1 <- rnorm(100)
x2 <- 4 * x1^2 + 1 + rnorm(100)
class <- sample(100, 50)
x2[class] <- x2[class] + 3
x2[-class] <- x2[-class] - 3
y <- rep(-1, 100)
y[class] <- 1
dat <- data.frame(x1 = x1,
x2 = x2,
y = as.factor(y))
# Errors:
error_train <- c()
error_test <- c()
# Train/Test
train <- sample(100, 80)
test = -train
# Plots:
ggplot(dat) +
geom_point(aes(x1, x2, group = y, color = y)) +
theme_bw()
Train a support vector classifier (linear kernel) on a range of values for cost:
set.seed(1)
tune.out_4_svc <- tune(svm, y ~ ., data = dat[train, ], kernel = "linear", ranges = list(cost = c (0.001, 0.01, 0.1, 1, 5, 10, 100)))
summary(tune.out_4_svc)
Parameter tuning of ‘svm’:
- sampling method: 10-fold cross validation
- best parameters:
- best performance: 0.1125
- Detailed performance results:
NA
Extract the best model and print summary:
bestmod_4_svc <- tune.out_4_svc$best.model
summary(bestmod_4_svc)
Call:
best.tune(method = svm, train.x = y ~ ., data = dat[train, ], ranges = list(cost = c(0.001, 0.01, 0.1, 1,
5, 10, 100)), kernel = "linear")
Parameters:
SVM-Type: C-classification
SVM-Kernel: linear
cost: 0.1
Number of Support Vectors: 56
( 28 28 )
Number of Classes: 2
Levels:
-1 1
Plot the decision boundary:
#plot(bestmod_4_svc, dat[train, ])
svm_plot(bestmod_4_svc, dat[train, ])
The linear classifier fails to accurately capture the non-linearity of the decision boundary.
mean(dat[train, "y"] != predict(bestmod_4_svc, newdata = dat[train, ]))
[1] 0.1125
svc_error_train <- mean(summary(bestmod_4_svc)$fitted != y[train])
error_train <- c(error_train, "svc" = svc_error_train)
svc_error_train
[1] 0.1125
ypred <- predict(bestmod_4_svc, dat[test, ])
table(predict = ypred, truth = y[test])
truth
predict -1 1
-1 8 0
1 3 9
svc_error_test <- mean(ypred != y[test])
error_test <- c(error_test, "svc" = svc_error_test)
svc_error_test
[1] 0.15
Train a support vector machine (polynomial kernel, \(d\) = 2) on a range of values for cost:
set.seed(1)
tune.out_4_svm <- tune(svm, y ~ ., data = dat[train, ], kernel = "polynomial", degree = 2, ranges = list(cost = c (0.001, 0.01, 0.1, 1, 5, 10, 100)))
summary(tune.out_4_svm)
Parameter tuning of ‘svm’:
- sampling method: 10-fold cross validation
- best parameters:
- best performance: 0.2875
- Detailed performance results:
NA
Extract the best model and print summary:
bestmod_4_svm <- tune.out_4_svm$best.model
summary(bestmod_4_svm)
Call:
best.tune(method = svm, train.x = y ~ ., data = dat[train, ], ranges = list(cost = c(0.001, 0.01, 0.1, 1,
5, 10, 100)), kernel = "polynomial", degree = 2)
Parameters:
SVM-Type: C-classification
SVM-Kernel: polynomial
cost: 5
degree: 2
coef.0: 0
Number of Support Vectors: 63
( 32 31 )
Number of Classes: 2
Levels:
-1 1
Plot the decision boundary:
#plot(bestmod_4_svm, dat[train, ])
svm_plot(bestmod_4_svm, dat[train, ])
The degree-2 polynomial kernel captures some of the non-linearity of the decision boundary, but there are many errors.
#mean(dat[train, "y"] != predict(bestmod_4_svm, newdata = dat[train, ]))
svm_error_train <- mean(summary(bestmod_4_svm)$fitted != y[train])
error_train <- c(error_train, "svm (d = 2)" = svm_error_train)
svm_error_train
[1] 0.275
ypred <- predict(bestmod_4_svm, dat[test, ])
table(predict = ypred, truth = dat[test, "y"])
truth
predict -1 1
-1 6 2
1 5 7
svm_error_test <- mean(ypred != y[test])
error_test <- c(error_test, "svm (d = 2)" = svm_error_test)
svm_error_test
[1] 0.35
Train a support vector machine (polynomial kernel, \(d\) = 3) on a range of values for cost:
set.seed(1)
tune.out_4_svm_2 <- tune(svm, y ~ ., data = dat[train, ], kernel = "polynomial", degree = 3, ranges = list(cost = c (0.001, 0.01, 0.1, 1, 5, 10, 100)))
summary(tune.out_4_svm_2)
Parameter tuning of ‘svm’:
- sampling method: 10-fold cross validation
- best parameters:
- best performance: 0.3875
- Detailed performance results:
NA
Extract the best model and print summary:
bestmod_4_svm_2 <- tune.out_4_svm_2$best.model
summary(bestmod_4_svm_2)
Call:
best.tune(method = svm, train.x = y ~ ., data = dat[train, ], ranges = list(cost = c(0.001, 0.01, 0.1, 1,
5, 10, 100)), kernel = "polynomial", degree = 3)
Parameters:
SVM-Type: C-classification
SVM-Kernel: polynomial
cost: 1
degree: 3
coef.0: 0
Number of Support Vectors: 75
( 38 37 )
Number of Classes: 2
Levels:
-1 1
Plot the decision boundary:
#plot(bestmod_4_svm_2, dat[train, ])
svm_plot(bestmod_4_svm_2, dat[train, ])
The degree-3 polynomial kernel captures some of the non-linearity of the decision boundary, but there many errors. It seems that a polynomial of degree 3 is worse than that of degree 2.
mean(dat[train, "y"] != predict(bestmod_4_svm_2, newdata = dat[train, ]))
[1] 0.4125
svm_2_error_train <- mean(summary(bestmod_4_svm_2)$fitted != y[train])
error_train <- c(error_train, "svm (d = 3)" = svm_2_error_train)
svm_2_error_train
[1] 0.4125
ypred <- predict(bestmod_4_svm_2, dat[test, ])
table(predict = ypred, truth = dat[test, "y"])
truth
predict -1 1
-1 1 1
1 10 8
svm_2_error_test <- mean(ypred != y[test])
error_test <- c(error_test, "svm (d = 3)" = svm_2_error_test)
svm_2_error_test
[1] 0.55
Train a support vector machine with a radial kernel on a range of values for cost and gamma:
set.seed(1)
tune.out_4_svm_rad <- tune(svm, y ~ ., data = dat[train, ], kernel = "radial", ranges = list(cost = c (0.001, 0.01, 0.1, 1, 5, 10, 100), gamma = c(0.5, 1, 2, 3, 4)))
summary(tune.out_4_svm_rad)
Parameter tuning of ‘svm’:
- sampling method: 10-fold cross validation
- best parameters:
- best performance: 0.025
- Detailed performance results:
NA
Extract the best model and print summary:
bestmod_4_svm_rad <- tune.out_4_svm_rad$best.model
summary(bestmod_4_svm_rad)
Call:
best.tune(method = svm, train.x = y ~ ., data = dat[train, ], ranges = list(cost = c(0.001, 0.01, 0.1, 1,
5, 10, 100), gamma = c(0.5, 1, 2, 3, 4)), kernel = "radial")
Parameters:
SVM-Type: C-classification
SVM-Kernel: radial
cost: 5
Number of Support Vectors: 17
( 8 9 )
Number of Classes: 2
Levels:
-1 1
Plot the decision boundary:
#plot(bestmod_4_svm_rad, dat[train, ])
svm_plot(bestmod_4_svm_rad, dat[train, ])
The radial kernel has very accurately captured the decision boundary. In fact there are no errors.
#mean(dat[train, "y"] != predict(bestmod_4_svm_rad, newdata = dat[train, ]))
svm_rad_error_train <- mean(summary(bestmod_4_svm_rad)$fitted != y[train])
error_train <- c(error_train, "svm (radial)" = svm_rad_error_train)
svm_rad_error_train
[1] 0
ypred <- predict(bestmod_4_svm_rad, dat[test, ])
table(predict = ypred, truth = dat[test, "y"])
truth
predict -1 1
-1 11 1
1 0 8
svm_rad_error_test <- mean(ypred != y[test])
error_test <- c(error_test, "svm (radial)" = svm_rad_error_test)
svm_rad_error_test
[1] 0.05
sort(error_train)
svm (radial) svc svm (d = 2) svm (d = 3)
0.0000 0.1125 0.2750 0.4125
sort(error_test)
svm (radial) svc svm (d = 2) svm (d = 3)
0.05 0.15 0.35 0.55
The SVM with radial kernel has the best training and test errors. However, the linear support vector classifier outperformed both of the SVM’s with polynomial kernels.
Redo the analysis with a more complex non-linear decision boundary to see if the polynomial kernels outperform the linear kernel:
# Prelims:
set.seed(1)
# Data:
x1 <- rnorm(100)
x2 <- 4 * x1^2 + 1 + rnorm(100)
class <- sample(100, 50)
x2[class[1:25]] <- x2[class[1:25]] + 3
x2[class[25:50]] <- x2[class[25:50]] - 10
x2[-class] <- x2[-class] - 3
y <- rep(-1, 100)
y[class] <- 1
dat <- data.frame(x1 = x1,
x2 = x2,
y = as.factor(y))
# Errors:
error_train <- c()
error_test <- c()
# Train/Test
train <- sample(100, 80)
test = -train
# Plots:
ggplot(dat) +
geom_point(aes(x1, x2, group = y, color = y)) +
theme_bw()
set.seed(1)
tune.out_4_svc <- tune(svm, y ~ ., data = dat[train, ], kernel = "linear", ranges = list(cost = c (0.001, 0.01, 0.1, 1, 5, 10, 100)))
bestmod_4_svc <- tune.out_4_svc$best.model
#plot(bestmod_4_svc, dat[train, ])
svm_plot(bestmod_4_svc, dat[train, ])
The linear classifier fails to capture any of the non-linear decision boundary.
svc_error_train <- mean(summary(bestmod_4_svc)$fitted != y[train])
error_train <- c(error_train, "svc" = svc_error_train)
ypred <- predict(bestmod_4_svc, dat[test, ])
table(predict = ypred, truth = y[test])
truth
predict -1 1
-1 0 0
1 11 9
svc_error_test <- mean(ypred != y[test])
error_test <- c(error_test, "svc" = svc_error_test)
set.seed(1)
tune.out_4_svm <- tune(svm, y ~ ., data = dat[train, ], kernel = "polynomial", degree = 2, ranges = list(cost = c (0.001, 0.01, 0.1, 1, 5, 10, 100)))
bestmod_4_svm <- tune.out_4_svm$best.model
#plot(bestmod_4_svm, dat[train, ])
svm_plot(bestmod_4_svm, dat[train, ])
The degree-2 polynomial kernel captures some of the non-linearity of the decision boundary, but there many errors.
#mean(dat[train, "y"] != predict(bestmod_4_svm, newdata = dat[train, ]))
svm_error_train <- mean(summary(bestmod_4_svm)$fitted != y[train])
error_train <- c(error_train, "svm (d = 2)" = svm_error_train)
ypred <- predict(bestmod_4_svm, dat[test, ])
table(predict = ypred, truth = dat[test, "y"])
truth
predict -1 1
-1 10 4
1 1 5
svm_error_test <- mean(ypred != y[test])
error_test <- c(error_test, "svm (d = 2)" = svm_error_test)
set.seed(1)
tune.out_4_svm_2 <- tune(svm, y ~ ., data = dat[train, ], kernel = "polynomial", degree = 3, ranges = list(cost = c (0.001, 0.01, 0.1, 1, 5, 10, 100)))
bestmod_4_svm_2 <- tune.out_4_svm_2$best.model
#plot(bestmod_4_svm_2, dat[train, ])
svm_plot(bestmod_4_svm_2, dat[train, ])
The degree-3 polynomial kernel captures some of the non-linearity of the decision boundary, but there many errors.
svm_2_error_train <- mean(summary(bestmod_4_svm_2)$fitted != y[train])
error_train <- c(error_train, "svm (d = 3)" = svm_2_error_train)
ypred <- predict(bestmod_4_svm_2, dat[test, ])
svm_2_error_test <- mean(ypred != y[test])
error_test <- c(error_test, "svm (d = 3)" = svm_2_error_test)
set.seed(1)
tune.out_4_svm_rad <- tune(svm, y ~ ., data = dat[train, ], kernel = "radial", ranges = list(cost = c (0.001, 0.01, 0.1, 1, 5, 10, 100), gamma = c(0.5, 1, 2, 3, 4)))
bestmod_4_svm_rad <- tune.out_4_svm_rad$best.model
#plot(bestmod_4_svm_rad, dat[train, ])
svm_plot(bestmod_4_svm_rad, dat[train, ])
svm_rad_error_train <- mean(summary(bestmod_4_svm_rad)$fitted != y[train])
error_train <- c(error_train, "svm (radial)" = svm_rad_error_train)
ypred <- predict(bestmod_4_svm_rad, dat[test, ])
table(predict = ypred, truth = dat[test, "y"])
truth
predict -1 1
-1 11 1
1 0 8
svm_rad_error_test <- mean(ypred != y[test])
error_test <- c(error_test, "svm (radial)" = svm_rad_error_test)
sort(error_train)
svm (radial) svm (d = 2) svm (d = 3) svc
0.0125 0.2750 0.3500 0.4875
sort(error_test)
svm (radial) svm (d = 2) svm (d = 3) svc
0.05 0.25 0.40 0.55
The SVM with radial kernel has the best training and test errors. This time, with a more complex decision boundary, both of the SVM’s with polynomial kernels outperformed the linear support vector classifier.
Generate a data set with \(n = 500\) and \(p = 2\), such that the observations belong to two classes with a quadratic decision boundary between them:
set.seed(1)
x1 <- runif(500) - 0.5
x2 <- runif(500) - 0.5
y <- 1*((x1^2 - x2^2) > 0)
df <- data.frame(x1, x2, y = as.factor(y))
Plot the observations, colored according to their class labels:
g0 <- ggplot(df) +
geom_point(aes(x1, x2, group = y, color = y)) +
theme_bw() +
ggtitle("True Class Labels") +
theme(plot.title = element_text(hjust = 0.5)) +
scale_color_discrete(name = "True Class")
g0
Use all of the data as the training data
Fit a logistic regression model to the data, using \(X_1\) and \(X_2\) as predictors:
logreg <- glm(y ~ x1 + x2, data = df, family = "binomial")
probs <- predict(logreg, newdata = df, type = "response")
preds <- rep(0, 500)
preds[probs > 0.5] <- 1
Apply this model to the training data in order to obtain a predicted class label for each training observation. Plot the observations, colored according to the predicted class labels. The decision boundary should be linear:
df$logreg_preds <- as.factor(preds)
g1 <- ggplot(df) +
geom_point(aes(x1, x2, group = logreg_preds, color = logreg_preds)) +
theme_bw() +
scale_color_discrete(name = "Pred. Class") +
ggtitle("Logistic Regression: y ~ x1 + x2") +
theme(plot.title = element_text(hjust = 0.5))
cowplot::plot_grid(g0, g1, nrow = 1)
Now fit a logistic regression model to the data using non-linear functions of \(X_1\) and \(X_2\) as predictors:
formulas <- c("y ~ I(x1^2) + I(x2^2)",
"y ~ I(x1^0.5) + I(x2^0.5)",
"y ~ x1 + x2 + I(x1^2) + I(x2^2)",
"y ~ x1 + x2 + I(x1^0.5) + I(x2^0.5)",
"y ~ x1 + x2 + I(x1^2) + I(x2^2) + I(x1 * x2)",
"y ~ x1 + x2 + I(x1^0.5) + I(x2^0.5) + I(x1 * x2)"
)
logregs <- map(formulas, ~glm(as.formula(.x), data = df, family = "binomial"))
probs <- map(logregs, ~predict(.x, newdata = df, type = "response"))
preds <- vector("list", length(formulas))
preds <- map(1:length(formulas), function(x) preds[[x]] = rep(0, 500))
for(i in 1:length(formulas)) {
preds[[i]][probs[[i]] > 0.5] <- 1
}
With the models involving the squares of the predictors the predicted results look quite close to the true values. The models involving the square roots do not predict the classes well:
G2 <- vector("list", length(formulas))
for(i in 1:length(formulas)) {
df_ <- data.frame(x1, x2, preds = as.factor(preds[[i]]))
G2[[i]] <- ggplot(df_) +
geom_point(aes(x1, x2,
group = preds,
color = preds)) +
ggtitle(label = paste0(formulas[i])) +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5)) +
scale_color_discrete(name = "Pred. Class")
}
cowplot::plot_grid(plotlist = G2, nrow = 3)
Fit a support vector classifier to the data with \(X_1\) and \(X_2\) as predictors:
svc.out <- tune(svm, y ~ ., data = df, kernel = "linear", ranges = list(cost = c (0.001, 0.01, 0.1, 1, 5, 10, 100)))
svc <- svc.out$best.model
preds <- predict(svc, df)
df_ <- data.frame(x1, x2, preds = as.factor(preds))
g3 <- ggplot(df_) +
geom_point(aes(x1, x2,
group = preds,
color = preds)) +
ggtitle("Support Vector Classifier") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5)) +
scale_color_discrete(name = "Pred. Class")
cowplot::plot_grid(g0, g3, nrow = 1)
Fit a SVM using a non-linear kernel to the data.
svm.out <- tune(svm, y ~ ., data = df, kernel = "polynomial", degree = 2, ranges = list(cost = c (0.001, 0.01, 0.1, 1, 5, 10, 100)))
svm1 <- svm.out$best.model
preds <- predict(svm1, df)
df_ <- data.frame(x1, x2, preds = as.factor(preds))
g4 <- ggplot(df_) +
geom_point(aes(x1, x2,
group = preds,
color = preds)) +
ggtitle("Support Vector Machine - Polynomial Kernel (d = 2)") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5, size = 10)) +
scale_color_discrete(name = "Pred. Class")
cowplot::plot_grid(g0, g4, nrow = 1)
svm.out2 <- tune(svm, y ~ ., data = df, kernel = "radial", degree = 2, ranges = list(cost = c (0.001, 0.01, 0.1, 1, 5, 10, 100), gamma = c(0.5, 1, 2, 3, 4)))
svm_rad <- svm.out$best.model
preds <- predict(svm_rad, df)
df_ <- data.frame(x1, x2, preds = as.factor(preds))
g5 <- ggplot(df_) +
geom_point(aes(x1, x2,
group = preds,
color = preds)) +
ggtitle("Support Vector Machine - Radial Kernel") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5, size = 12)) +
scale_color_discrete(name = "Pred. Class")
cowplot::plot_grid(g0, g5, nrow = 1)
The logistic regression model with linear combination of the variables and the support vector classifier both performed poorly. The logistic regression model with non-linear transformations of the variables, as well as support vector machines with polynomial and radial kernels all performed well.
g2 <- G2[[1]] + ggtitle("Logistic Regression: y ~ I(x1^2) + I(x2^2)")
cowplot::plot_grid(g0, g1, g2, g3, g4, g5, nrow = 3)
It’s hard to figure out exactly what the best type of data is for this question. Most of the data sets tried result in zero test error as the cost increases. Using the below data set with a modest \(n\) of 100 seems to work for what the question asks, i.e. the test error gets to a minimum before rising again as the cost increases.
# Prelim:
set.seed(1)
# Data:
n <- 100
x1 <- runif(n, max = 0.3, min = -0.3)
x2 <- rnorm(n, sd = 1)
y <- ifelse(-0.2 * x1 + x2 > 0, 1, -1)
df <- data.frame(x1, x2, y = as.factor(y))
# Train/Test:
train <- sample(n, 4*n/5)
test <- -train
# Plot:
ggplot(df) +
geom_point(aes(x1, x2, group = y, color = y)) +
theme_bw() +
ggtitle("True Class Labels") +
theme(plot.title = element_text(hjust = 0.5)) +
scale_color_discrete(name = "True Class", breaks = c(1, -1))
Generate support vector classifiers via cross validation using a range of values for cost (use all of the data for CV, then we will use the training/test splits in the next section):
costs <- c (0.001, 0.01, 0.1, 1, 5, 10, 100, 1e3, 1e4, 1e5)
svc.out <- tune(svm, y ~ ., data = df, kernel = "linear", ranges = list(cost = costs))
svc.cv.error <- svc.out$performances$error # extract the CV errors
svc.cv.error
[1] 0.44 0.25 0.05 0.06 0.06 0.05 0.04 0.03 0.02 0.02
The training and test errors can be obtained by training SVC models using a range of values for cost and then making predictions on the training and test data respectively:
svc.train.error <- vector("double", length(costs))
svc.test.error <- vector("double", length(costs))
G <- vector("list", length(costs))
for(i in 1:length(costs)) {
svc <- svm(y ~ ., data = df[train, ], kernel = "linear", cost = costs[i])
G[[i]] <- svm_plot(svc, df[train, ]) + ggtitle(str_c("cost: ", costs[i]))
# Training Error:
preds <- predict(svc, df[train, ])
train.error <- mean(preds != y[train])
svc.train.error[i] <- train.error
# Test Error:
preds <- predict(svc, df[test, ])
test.error <- mean(preds != y[test])
svc.test.error[i] <- test.error
}
svc.train.error
[1] 0.4375 0.2500 0.1000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
svc.test.error
[1] 0.45 0.35 0.00 0.05 0.05 0.05 0.05 0.05 0.05 0.05
plot_grid(plotlist = G, nrow = 5)
Plot the CV, training, and test errors:
library(latex2exp)
svc.errors <- data.frame(cost = log10(costs),
cv.error = svc.cv.error,
train.error = svc.train.error,
test.error = svc.test.error)
# see https://stackoverflow.com/a/50496899 and https://stackoverflow.com/a/10355844 for making legend
ggplot(svc.errors) +
geom_line(aes(cost, cv.error, color = "CV")) +
geom_line(aes(cost, train.error, color = "Train")) +
geom_line(aes(cost, test.error, color = "Test")) +
geom_hline(aes(yintercept = min(svc.test.error)), color = "grey30", linetype = "dashed") +
scale_color_manual(name = "Error Rate", breaks = c("Test", "Train", "CV"), values = c("Test" = "#00BA38", "CV" = "#F8766D", "Train" = "#619CFF")) +
#scale_color_discrete(name = NULL, labels = c("CV Error", "Test Error", "Training Error")) +
labs(title = "Comparison of CV, Training and Test Error Rates",
x = TeX("Log_{10}(Cost)"),
y = "Error Rate") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5),
legend.position = c(0.97, 0.95), legend.justification = c(1, 1))
The error rates approach zero as the cost increases from small values towards 1. Then, the test error starts to rise again, suggesting that a lower value for cost is better, even though the training error is zero for larger values.
View data:
Auto
Summary:
summary(Auto)
mpg cylinders displacement horsepower weight acceleration year origin
Min. : 9.00 Min. :3.000 Min. : 68.0 Min. : 46.0 Min. :1613 Min. : 8.00 Min. :70.00 Min. :1.000
1st Qu.:17.00 1st Qu.:4.000 1st Qu.:105.0 1st Qu.: 75.0 1st Qu.:2225 1st Qu.:13.78 1st Qu.:73.00 1st Qu.:1.000
Median :22.75 Median :4.000 Median :151.0 Median : 93.5 Median :2804 Median :15.50 Median :76.00 Median :1.000
Mean :23.45 Mean :5.472 Mean :194.4 Mean :104.5 Mean :2978 Mean :15.54 Mean :75.98 Mean :1.577
3rd Qu.:29.00 3rd Qu.:8.000 3rd Qu.:275.8 3rd Qu.:126.0 3rd Qu.:3615 3rd Qu.:17.02 3rd Qu.:79.00 3rd Qu.:2.000
Max. :46.60 Max. :8.000 Max. :455.0 Max. :230.0 Max. :5140 Max. :24.80 Max. :82.00 Max. :3.000
name
amc matador : 5
ford pinto : 5
toyota corolla : 5
amc gremlin : 4
amc hornet : 4
chevrolet chevette: 4
(Other) :365
Auto$origin <- as.factor(Auto$origin)
Auto$year <- factor(Auto$year, ordered = TRUE)
head(Auto)
Create a binary variable that takes on a 1 for cars with gas mileage above the median, and a 0 for cars with gas mileage below the median:
Auto$med01 <- as.factor(Auto$mpg > median(Auto$mpg))
Auto <- Auto[ , -1]
summary(Auto$med01)
FALSE TRUE
196 196
Fit a support vector classifier to the data with various values of cost, in order to predict whether a car gets high or low gas mileage:
set.seed(1)
costs <- c (0.001, 0.01, 0.1, 1, 5, 10, 50, 100, 250, 500, 1e3, 5e3, 1e4, 5e4, 1e5)
svc.out <- tune(svm, med01 ~ ., data = Auto, kernel = "linear", ranges = list(cost = costs))
svc.cv.error <- svc.out$performances$error # extract the CV errors
svc.cv.error
[1] 0.18141026 0.10211538 0.09955128 0.09192308 0.09442308 0.09955128 0.14032051 0.14288462 0.15570513 0.15570513 0.15570513
[12] 0.15570513 0.15570513 0.15570513 0.15570513
Plot CV errors:
ggplot(data.frame(cost = log10(costs), cv.error = svc.cv.error)) +
geom_line(aes(cost, cv.error), color = ISLR_orange) +
labs(title = "CV Error Rate as Function of Cost",
x = TeX("Log_{10}(Cost)"),
y = "CV Error Rate") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5))
Now repeat (b), this time using SVMs with radial and polynomial basis kernels, with different values of gamma and degree and cost.
set.seed(1)
costs <- c (0.001, 0.01, 0.1, 1, 5, 10, 50, 100, 250, 500, 1e3, 5e3, 1e4, 5e4, 1e5)
degrees <- c(2, 3, 4, 5)
svc.out.poly <- tune(svm, med01 ~ ., data = Auto, kernel = "polynomial", ranges = list(degree = degrees, cost = costs)) # first variable in ranges is the rows in below matrix
svc.poly.cv.error <- svc.out.poly$performances$error # extract the CV errors
round(matrix(svc.poly.cv.error, nrow = length(degrees)), 3)
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14] [,15]
[1,] 0.551 0.551 0.551 0.551 0.551 0.549 0.352 0.281 0.250 0.228 0.135 0.097 0.082 0.089 0.089
[2,] 0.551 0.551 0.551 0.551 0.551 0.551 0.551 0.454 0.368 0.312 0.276 0.255 0.258 0.123 0.120
[3,] 0.551 0.551 0.551 0.551 0.551 0.551 0.551 0.551 0.551 0.551 0.551 0.498 0.470 0.383 0.350
[4,] 0.551 0.551 0.551 0.551 0.551 0.551 0.551 0.551 0.551 0.551 0.551 0.551 0.551 0.503 0.493
Plot CV errors:
dat_ <- data.frame(cbind(cost = log10(costs), t(matrix(svc.poly.cv.error, nrow = length(degrees))))) %>%
gather(key = "degree", value = "cv.error", V2:V5) %>%
mutate(degree = factor(str_replace(degree, "V", "degree: ")))
ggplot(dat_) +
geom_line(aes(cost, cv.error, color = degree)) +
labs(title = "Polynomial Kernel - CV Error Rate as Function of Cost",
x = TeX("Log_{10}(Cost)"),
y = "CV Error Rate") +
ggsci::scale_color_d3() +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5))
set.seed(1)
costs <- c(0.001, 0.01, 0.1, 1, 5, 10, 50, 100, 250, 500, 1e3, 5e3, 1e4, 5e4, 1e5)
gammas = c(0.01, 0.05, 0.1, 0.5)
svc.out.rad <- tune(svm, med01 ~ ., data = Auto, kernel = "polynomial", ranges = list(gamma = gammas, cost = costs)) # first variable in ranges is the rows in below matrix
svc.rad.cv.error <- svc.out.rad$performances$error # extract the CV errors
round(matrix(svc.rad.cv.error, nrow = length(gammas)), 3)
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14] [,15]
[1,] 0.551 0.551 0.551 0.551 0.408 0.350 0.260 0.261 0.258 0.240 0.192 0.120 0.097 0.084 0.082
[2,] 0.551 0.551 0.337 0.255 0.230 0.143 0.110 0.097 0.084 0.084 0.079 0.079 0.079 0.079 0.079
[3,] 0.551 0.350 0.261 0.192 0.120 0.097 0.084 0.082 0.079 0.079 0.079 0.079 0.079 0.079 0.079
[4,] 0.255 0.143 0.097 0.079 0.079 0.079 0.079 0.079 0.079 0.079 0.079 0.079 0.079 0.079 0.079
Plot CV errors:
dat_ <- data.frame(cbind(cost = log10(costs), t(matrix(svc.rad.cv.error, nrow = length(gammas))))) %>%
gather(key = "gamma", value = "cv.error", V2:V5) %>%
mutate(gamma = str_c("gamma: ", factor(gammas[as.numeric(str_remove(gamma, "V")) - 1]))) # factor to make it a discrete var in ggplot()
ggplot(dat_) +
geom_line(aes(cost, cv.error, color = gamma)) +
labs(title = "Radial Kernel - CV Error Rate as Function of Cost",
x = TeX("Log_{10}(Cost)"),
y = "CV Error Rate") +
ggsci::scale_color_d3() +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5))
A support vector machine with a linear kernel and cost = 0.1 performs well, with a CV error of 0.087
A support vector machine with a polynomial kernel of degree three and also achieves a low CV error of 0.08. However the cost is very large at 100,000.
A support vector machine with a radial kernel and parameters of cost = 1 and gamma = 0.5 has the lowest CV error at 0.089. Other radial kernel SVM’s with very large cost values also have an equally low CV error, but a gamma value of 0.5 performs well with lower cost which would likely overfit less than the other models.
View data:
head(OJ)
Summary:
summary(OJ)
Purchase WeekofPurchase StoreID PriceCH PriceMM DiscCH DiscMM SpecialCH
CH:653 Min. :227.0 Min. :1.00 Min. :1.690 Min. :1.690 Min. :0.00000 Min. :0.0000 Min. :0.0000
MM:417 1st Qu.:240.0 1st Qu.:2.00 1st Qu.:1.790 1st Qu.:1.990 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000
Median :257.0 Median :3.00 Median :1.860 Median :2.090 Median :0.00000 Median :0.0000 Median :0.0000
Mean :254.4 Mean :3.96 Mean :1.867 Mean :2.085 Mean :0.05186 Mean :0.1234 Mean :0.1477
3rd Qu.:268.0 3rd Qu.:7.00 3rd Qu.:1.990 3rd Qu.:2.180 3rd Qu.:0.00000 3rd Qu.:0.2300 3rd Qu.:0.0000
Max. :278.0 Max. :7.00 Max. :2.090 Max. :2.290 Max. :0.50000 Max. :0.8000 Max. :1.0000
SpecialMM LoyalCH SalePriceMM SalePriceCH PriceDiff Store7 PctDiscMM
Min. :0.0000 Min. :0.000011 Min. :1.190 Min. :1.390 Min. :-0.6700 No :714 Min. :0.0000
1st Qu.:0.0000 1st Qu.:0.325257 1st Qu.:1.690 1st Qu.:1.750 1st Qu.: 0.0000 Yes:356 1st Qu.:0.0000
Median :0.0000 Median :0.600000 Median :2.090 Median :1.860 Median : 0.2300 Median :0.0000
Mean :0.1617 Mean :0.565782 Mean :1.962 Mean :1.816 Mean : 0.1465 Mean :0.0593
3rd Qu.:0.0000 3rd Qu.:0.850873 3rd Qu.:2.130 3rd Qu.:1.890 3rd Qu.: 0.3200 3rd Qu.:0.1127
Max. :1.0000 Max. :0.999947 Max. :2.290 Max. :2.090 Max. : 0.6400 Max. :0.4020
PctDiscCH ListPriceDiff STORE
Min. :0.00000 Min. :0.000 Min. :0.000
1st Qu.:0.00000 1st Qu.:0.140 1st Qu.:0.000
Median :0.00000 Median :0.240 Median :2.000
Mean :0.02731 Mean :0.218 Mean :1.631
3rd Qu.:0.00000 3rd Qu.:0.300 3rd Qu.:3.000
Max. :0.25269 Max. :0.440 Max. :4.000
Create a training set containing a random sample of 800 observations, and a test set containing the remaining observations:
# Prelims:
set.seed(1)
# Data:
df <- OJ
# Train/Test:
train <- sample(1:nrow(OJ), 800)
test <- -train
Fit a support vector classifier to the training data using cost = 0.01, with Purchase as the response and the other variables as predictors:
svc_8 <- svm(Purchase ~ . , data = df[train, ], kernel = "linear", cost = 0.01)
summary(svc_8)
Call:
svm(formula = Purchase ~ ., data = df[train, ], kernel = "linear", cost = 0.01)
Parameters:
SVM-Type: C-classification
SVM-Kernel: linear
cost: 0.01
Number of Support Vectors: 435
( 219 216 )
Number of Classes: 2
Levels:
CH MM
Training error:
svc_8.train.error <- mean(predict(svc_8, df[train, ]) != df$Purchase[train])
svc_8.train.error
[1] 0.175
Test error:
svc_8.test.error <- mean(predict(svc_8, df[test, ]) != df$Purchase[test])
svc_8.test.error
[1] 0.1777778
Use the tune() function to select an optimal cost. Consider values in the range 0.01 to 10.
set.seed(1)
costs <- c (0.01, 0.05, 0.1, 0.25, 0.5, 1, 2, 5, 10)
svc.out_8 <- tune(svm, Purchase ~ ., data = df[train, ], kernel = "linear", ranges = list(cost = costs))
svc_8.cv.error <- svc.out_8$performances$error # extract the CV errors
svc_8.cv.error
[1] 0.17625 0.17625 0.17250 0.17125 0.16875 0.17500 0.17250 0.17250 0.17375
Plot:
ggplot(data.frame(cost = log10(costs), cv.error = svc_8.cv.error)) +
geom_line(aes(cost, cv.error), color = ISLR_orange) +
labs(title = "Linear Kernel - CV Error Rate as Function of Cost",
x = TeX("Log_{10}(Cost)"),
y = "CV Error Rate") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5))
The training and test errors can be obtained by training SVC models using a range of values for cost and then making predictions on the training and test data respectively:
set.seed(1)
svc_8.train.error <- vector("double", length(costs))
svc_8.test.error <- vector("double", length(costs))
for(i in 1:length(costs)) {
svc <- svm(Purchase ~ ., data = df[train, ], kernel = "linear", cost = costs[i])
# Training Error:
preds <- predict(svc, df[train, ])
train.error <- mean(preds != df$Purchase[train])
svc_8.train.error[i] <- train.error
# Test Error:
preds <- predict(svc, df[test, ])
test.error <- mean(preds != df$Purchase[test])
svc_8.test.error[i] <- test.error
}
svc_8.train.error
[1] 0.17500 0.17000 0.16500 0.16500 0.16500 0.16375 0.16500 0.16625 0.16375
svc_8.test.error
[1] 0.1777778 0.1629630 0.1629630 0.1592593 0.1555556 0.1555556 0.1518519 0.1555556 0.1481481
Plot the CV, training, and test errors:
library(latex2exp)
svc_8.errors <- data.frame(cost = log10(costs),
cv.error = svc_8.cv.error,
train.error = svc_8.train.error,
test.error = svc_8.test.error)
# see https://stackoverflow.com/a/50496899 and https://stackoverflow.com/a/10355844 for making legend
g1 <- ggplot(svc_8.errors) +
geom_line(aes(cost, cv.error, color = "CV")) +
geom_line(aes(cost, train.error, color = "Train")) +
geom_line(aes(cost, test.error, color = "Test")) +
scale_color_manual(name = "Error Rate", breaks = c("Test", "Train", "CV"), values = c("Test" = "#00BA38", "CV" = "#F8766D", "Train" = "#619CFF")) +
#scale_color_discrete(name = NULL, labels = c("CV Error", "Test Error", "Training Error")) +
labs(title = "Linear Kernel - Comparison of CV, Training and Test Error Rates",
x = TeX("Log_{10}(Cost)"),
y = "Error Rate") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5),
legend.position = c(0.97, 0.95), legend.justification = c(1, 1))
g1
The training error seems to be higher than the test error.
set.seed(1)
svm_8_rad <- svm(Purchase ~ . , data = df[train, ], kernel = "radial", cost = 0.01)
summary(svm_8_rad)
Call:
svm(formula = Purchase ~ ., data = df[train, ], kernel = "radial", cost = 0.01)
Parameters:
SVM-Type: C-classification
SVM-Kernel: radial
cost: 0.01
Number of Support Vectors: 634
( 319 315 )
Number of Classes: 2
Levels:
CH MM
Training error:
svm_8_rad.train.error <- mean(predict(svm_8_rad, df[train, ]) != df$Purchase[train])
svm_8_rad.train.error
[1] 0.39375
Test error:
svm_8_rad.test.error <- mean(predict(svm_8_rad, df[test, ]) != df$Purchase[test])
svm_8_rad.test.error
[1] 0.3777778
Use CV:
set.seed(1)
svm.out_8_rad <- tune(svm, Purchase ~ ., data = df[train, ], kernel = "radial", ranges = list(cost = costs))
svm_8_rad.cv.error <- svm.out_8_rad$performances$error # extract the CV errors
svm_8_rad.cv.error
[1] 0.39375 0.20250 0.18625 0.18000 0.16750 0.17125 0.17750 0.18000 0.18625
Plot CV error:
ggplot(data.frame(cost = log10(costs), cv.error = svm_8_rad.cv.error)) +
geom_line(aes(cost, cv.error), color = ISLR_orange) +
labs(title = "Radial Kernel - CV Error Rate as Function of Cost",
x = TeX("Log_{10}(Cost)"),
y = "CV Error Rate") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5))
The training and test errors can be obtained by training SVM models using a range of values for cost and then making predictions on the training and test data respectively:
set.seed(1)
svm_8_rad.train.error <- vector("double", length(costs))
svm_8_rad.test.error <- vector("double", length(costs))
for(i in 1:length(costs)) {
svm_8 <- svm(Purchase ~ ., data = df[train, ], kernel = "radial", cost = costs[i])
# Training Error:
preds <- predict(svm_8, df[train, ])
train.error <- mean(preds != df$Purchase[train])
svm_8_rad.train.error[i] <- train.error
# Test Error:
preds <- predict(svm_8, df[test, ])
test.error <- mean(preds != df$Purchase[test])
svm_8_rad.test.error[i] <- test.error
}
svm_8_rad.train.error
[1] 0.39375 0.18250 0.17375 0.16000 0.14750 0.15125 0.14750 0.14625 0.14500
svm_8_rad.test.error
[1] 0.3777778 0.2333333 0.2037037 0.1962963 0.1777778 0.1851852 0.1814815 0.1777778 0.1851852
Plot the CV, training, and test errors:
library(latex2exp)
svm_8_rad.errors <- data.frame(cost = log10(costs),
cv.error = svm_8_rad.cv.error,
train.error = svm_8_rad.train.error,
test.error = svm_8_rad.test.error)
# see https://stackoverflow.com/a/50496899 and https://stackoverflow.com/a/10355844 for making legend
g2 <- ggplot(svm_8_rad.errors) +
geom_line(aes(cost, cv.error, color = "CV")) +
geom_line(aes(cost, train.error, color = "Train")) +
geom_line(aes(cost, test.error, color = "Test")) +
scale_color_manual(name = "Error Rate", breaks = c("Test", "Train", "CV"), values = c("Test" = "#00BA38", "CV" = "#F8766D", "Train" = "#619CFF")) +
#scale_color_discrete(name = NULL, labels = c("CV Error", "Test Error", "Training Error")) +
labs(title = "Radial Kernel - Comparison of CV, Training and Test Error Rates",
x = TeX("Log_{10}(Cost)"),
y = "Error Rate") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5),
legend.position = c(0.97, 0.95), legend.justification = c(1, 1))
g2
With the radial kernel, the training error is lower than the test error.
set.seed(1)
svm_8_poly <- svm(Purchase ~ . , data = df[train, ], kernel = "polynomial", degree = 2, cost = 0.01)
summary(svm_8_poly)
Call:
svm(formula = Purchase ~ ., data = df[train, ], kernel = "polynomial", degree = 2, cost = 0.01)
Parameters:
SVM-Type: C-classification
SVM-Kernel: polynomial
cost: 0.01
degree: 2
coef.0: 0
Number of Support Vectors: 636
( 321 315 )
Number of Classes: 2
Levels:
CH MM
Training error:
svm_8_poly.train.error <- mean(predict(svm_8_poly, df[train, ]) != df$Purchase[train])
svm_8_poly.train.error
[1] 0.3725
Test error:
svm_8_poly.test.error <- mean(predict(svm_8_poly, df[test, ]) != df$Purchase[test])
svm_8_poly.test.error
[1] 0.3666667
Use CV:
set.seed(1)
svm.out_8_poly <- tune(svm, Purchase ~ ., data = df[train, ], kernel = "polynomial", degree = 2, ranges = list(cost = costs))
Plot CV error:
ggplot(data.frame(cost = log10(costs), cv.error = svm_8_poly.cv.error)) +
geom_line(aes(cost, cv.error), color = ISLR_orange) +
labs(title = "Polynomial Kernel (d = 2) - CV Error Rate as Function of Cost",
x = TeX("Log_{10}(Cost)"),
y = "CV Error Rate") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5))
The training and test errors can be obtained by training SVC models using a range of values for cost and then making predictions on the training and test data respectively:
set.seed(1)
svm_8_poly.train.error <- vector("double", length(costs))
svm_8_poly.test.error <- vector("double", length(costs))
for(i in 1:length(costs)) {
svm_8 <- svm(Purchase ~ ., data = df[train, ], kernel = "polynomial", degree = 2, cost = costs[i])
# Training Error:
preds <- predict(svm_8, df[train, ])
train.error <- mean(preds != df$Purchase[train])
svm_8_poly.train.error[i] <- train.error
# Test Error:
preds <- predict(svm_8, df[test, ])
test.error <- mean(preds != df$Purchase[test])
svm_8_poly.test.error[i] <- test.error
}
svm_8_poly.train.error
[1] 0.37250 0.33125 0.30625 0.19125 0.18750 0.18250 0.16000 0.15500 0.15000
svm_8_poly.test.error
[1] 0.3666667 0.3074074 0.2962963 0.2333333 0.2259259 0.2222222 0.2111111 0.1814815 0.1888889
Plot the CV, training, and test errors:
library(latex2exp)
svm_8_poly.errors <- data.frame(cost = log10(costs),
cv.error = svm_8_poly.cv.error,
train.error = svm_8_poly.train.error,
test.error = svm_8_poly.test.error)
# see https://stackoverflow.com/a/50496899 and https://stackoverflow.com/a/10355844 for making legend
g3 <- ggplot(svm_8_poly.errors) +
geom_line(aes(cost, cv.error, color = "CV")) +
geom_line(aes(cost, train.error, color = "Train")) +
geom_line(aes(cost, test.error, color = "Test")) +
scale_color_manual(name = "Error Rate", breaks = c("Test", "Train", "CV"), values = c("Test" = "#00BA38", "CV" = "#F8766D", "Train" = "#619CFF")) +
#scale_color_discrete(name = NULL, labels = c("CV Error", "Test Error", "Training Error")) +
labs(title = "Polynomial Kernel (d = 2) - Comparison of CV, Training and Test Error Rates",
x = TeX("Log_{10}(Cost)"),
y = "Error Rate") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5),
legend.position = c(0.97, 0.95), legend.justification = c(1, 1))
g3
With the radial kernel, the training error is lower than the test error.
Comparison of three kernels:
plot_grid(plotlist = list(g1 + ggtitle("Linear") + scale_y_continuous(limits = c(0.14, 0.4), breaks = seq(0.1, 0.4, by = 0.05)),
g2 + ggtitle("Radial") + scale_y_continuous(limits = c(0.14, 0.4), breaks = seq(0.1, 0.4, by = 0.05)) + theme(axis.title.y = element_blank()),
g3 + ggtitle("Polynomial") + scale_y_continuous(limits = c(0.14, 0.4), breaks = seq(0.1, 0.4, by = 0.05)) + theme(axis.title.y = element_blank())), nrow = 1)
James G, Witten D, Hastie T, Tibshirani R (2013). An Introduction to Statistical Learning: with Applications in R. Springer, 1st edition.↩︎